Tutoriais https://www.datacamp.com/tutorial/pca-analysis-r https://rpkgs.datanovia.com/factoextra/index.html http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/114-mca-multiple-correspondence-analysis-in-r-essentials/ https://statisticsglobe.com/pca-before-k-means-clustering-r>
Bibliotecas
library(factoextra)
library(caret)
Loading required package: lattice
Attaching package: ‘lattice’
The following object is masked from ‘package:clusterProfiler’:
dotplot
Attaching package: ‘caret’
The following object is masked from ‘package:purrr’:
lift
library(stats)
library(ggfortify)
library(ggplot2)
# Criar um PCA com o p-valor de cada gene
pca_data <- prcomp(t(assay(dds))) # Perform PCA on the transposed count or normalized data
# Create a dataframe with PCA components and Timepointf and FirstSecondDosef
pca_df <- data.frame(PC1 = pca_data$x[, 1], PC2 = pca_data$x[, 2],
Timepoint = colData(dds)$Timepoint,
Vaccines = colData(dds)$Vaccine)
# Plot the PCA using ggplot2
ggplot(pca_df, aes(x = PC1, y = PC2, color = Vaccines, shape = Timepoint)) +
geom_point(size = 3) +
labs(title = "Principal component analysis - Vaccine-Timepoint", x = "PC1", y = "PC2") +
theme_minimal()
#DESeq2 native analysis
vsd <- vst(dds, blind=FALSE)
pcaData = plotPCA(vsd,intgroup=c("Timepoint", "Vaccine"), returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
pca_plot = ggplot(pcaData, aes(PC1, PC2, color=Vaccine, shape=Timepoint)) +
geom_point(size=2) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + ggtitle("Principal component analysis - Vaccines and Timepoint")
ggsave(pca_plot, file = "GSE206023_PCA.png")
Padronizar
install.packages("janitor")
library(janitor)
#Inputs
degs_allstudies_updown = degs_allstudies_updown_filter %>%
select(-ends_with(".y")) %>%
rename_all(~sub("\\.x$", "", .)) %>%
clean_names() %>%
rename(lfcse = lfc_se,
l2fc = log2fold_change) %>%
select(- set_size_intervals) %>%
group_by(study) %>%
select(vaccine, study, gse_id, genes, process, gene_set_short, immune_system, immune_sub_system, immune_tissue, go_term, set_size, everything(), filter) %>%
mutate(study = ifelse(study == "ChAd (V2, D2)", "ChAd (V2, D3)", study)) %>%
distinct() %>%
filter(filter == "p<0.05")
#Salvar
write.csv(degs_allstudies_updown, file = "ImmuneGO_degs_allstudies_updown_filter_p005.csv")
Separar processos imunológicos em objetos
############## Innate
Immune_GO_innate = ImmuneGO_Annotated_Genes %>% #250 genes (SetSize)
filter(process == "INNATE IMMUNE RESPONSE" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_Complement = degs_allstudies_updown %>% #27 genes (SetSize)
filter(process == "COMPLEMENT ACTIVATION" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_Inflammatory = degs_allstudies_updown %>% #14 genes (SetSize)
filter(process == "INFLAMMATORY RESPONSE TO ANTIGENIC STIMULUS" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_DendriticCells = degs_allstudies_updown %>% #11 genes (SetSize)
filter(process %in% c("DENDRITIC CELL CHEMOTAXIS",
"DENDRITIC CELL HOMEOSTASIS",
"DENDRITIC CELL MIGRATION",
"MYELOID DENDRITIC CELL ACTIVATION",
"MYELOID DENDRITIC CELL CHEMOTAXIS",
"MYELOID DENDRITIC CELL DIFFERENTIATION",
"PLASMACYTOID DENDRITIC CELL ACTIVATION",
"PLASMACYTOID DENDRITIC CELL DIFFERENTIATION") &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_Macrophages = degs_allstudies_updown %>%
filter(process %in% c("MACROPHAGE ACTIVATION",
"MACROPHAGE ACTIVATION INVOLVED IN IMMUNE RESPONSE",
"MACROPHAGE CHEMOTAXIS",
"MACROPHAGE HOMEOSTASIS",
"MACROPHAGE MIGRATION") &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_APP = degs_allstudies_updown %>% #21 genes (SetSize)
filter(process == "ANTIGEN PROCESSING AND PRESENTATION" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_TLR = degs_allstudies_updown %>% #14 genes (SetSize)
filter(process == "TOLL-LIKE RECEPTOR SIGNALING PATHWAY" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_PRR = degs_allstudies_updown %>% #8 genes (SetSize)
filter(process == "PATTERN RECOGNITION RECEPTOR SIGNALING PATHWAY" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
Immune_GO_innate_CPR = degs_allstudies_updown %>% #6 genes (SetSize)
filter(process == "CYTOSOLIC PATTERN RECOGNITION RECEPTOR SIGNALING PATHWAY" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)))
############## Adaptive
Immune_GO_adaptive = degs_allstudies_updown %>% #330 genes (SetSize)
filter(process == "ADAPTIVE IMMUNE RESPONSE" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
#Humoral
Immune_GO_adaptive_humoral = degs_allstudies_updown %>% #27 genes (SetSize)
filter(process == "HUMORAL ADAPTIVE IMMUNE SYSTEM" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adap_IgMediated = degs_allstudies_updown %>% #62 genes (SetSize)
filter(process == "IMMUNOGLOBULIN MEDIATED IMMUNE RESPONSE" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adap_Breceptorsig = degs_allstudies_updown %>% #31 genes (SetSize)
filter(process == "B CELL RECEPTOR SIGNALING PATHWAY" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adap_Bactivation = degs_allstudies_updown %>% #36 genes
filter(process == "B CELL ACTIVATION" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
#Cellular
Immune_GO_adaptive_cellular = degs_allstudies_updown %>% #41 genes
filter(process == "CELLULAR ADAPTIVE IMMUNE SYSTEM" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adaptive_cellular_tcellactivation = degs_allstudies_updown %>% #41 genes
filter(process == "T CELL ACTIVATION" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adaptive_cellular_treceptor = degs_allstudies_updown %>% #78 genes
filter(process == "T CELL RECEPTOR SIGNALING PATHWAY" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adaptive_cellular_th1 = degs_allstudies_updown %>% #6 genes
filter(process == "T-HELPER 1 TYPE IMMUNE RESPONSE" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adaptive_cellular_th2 = degs_allstudies_updown %>% #5 genes
filter(process == "TYPE 2 IMMUNE RESPONSE" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
Immune_GO_adaptive_cellular_th17 = degs_allstudies_updown %>% #3 genes
filter(process == "T-HELPER 17 TYPE IMMUNE RESPONSE" &
((gse_id %in% c("GSE199750", "GSE201533") & padj <= 0.05) |
(gse_id == "GSE206023" & pvalue <= 0.05)) &
!(gse_id %in% c("GSE201533")))
ImmuneGO_Genes_interest = bind_rows(
Immune_GO_general_innate,
Immune_GO_general_Complement,
Immune_GO_innate_Inflammatory,
Immune_GO_innate_Macrophages,
Immune_GO_innate_DendriticCells,
Immune_GO_innate_APP,
Immune_GO_innate_TLR,
Immune_GO_innate_PRR,
Immune_GO_innate_CPR,
Immune_GO_adaptive,
Immune_GO_adaptive_humoral,
Immune_GO_adap_IgMediated,
Immune_GO_adap_Breceptorsig,
Immune_GO_adap_Bactivation,
Immune_GO_adaptive_cellular,
Immune_GO_adaptive_cellular_tcellactivation,
Immune_GO_adaptive_cellular_treceptor,
Immune_GO_adaptive_cellular_th1,
Immune_GO_adaptive_cellular_th2,
Immune_GO_adaptive_cellular_th17)
write.csv(ImmuneGO_Genes_interest, file = "ImmuneGO_Genes_interest_bystudy.csv")
Processar dados
# Immune_GO_general_innate
# Immune_GO_adaptive
Immune_GO_general_Complement
Immune_GO_innate_Macrophages
Immune_GO_innate_DendriticCells
Immune_GO_innate_Inflammatory
Immune_GO_innate_APP
Immune_GO_innate_TLR #Não usar
Immune_GO_innate_PRR #(SIGNALING PATHWAY)
Immune_GO_innate_CPR #Não usar
Immune_GO_adaptive_humoral
Immune_GO_adap_IgMediated
Immune_GO_adap_Breceptorsig
Immune_GO_adap_Bactivation
Immune_GO_adaptive_cellular
Immune_GO_adaptive_cellular_tcellactivation
Immune_GO_adaptive_cellular_treceptor
#INPUT
data_genes = Immune_GO_adaptive
filename = "Immune_GO_adaptive"
PCA
#Converter de long para wide
matrix_genes = data_genes %>%
select(study, genes, l2fc) %>%
dcast(`study` ~ `genes`,
value.var = "l2fc",
fun.aggregate = mean) %>%
as.data.frame() %>%
column_to_rownames(var = "study") %>%
replace(is.na(.), 0)
matrix_data_pca = matrix_genes %>%
rownames_to_column(var = "Condition")
matrix_data_pca_ready = matrix_data_pca %>%
column_to_rownames(var = "Condition")
ann_vaccines_pca_matrix = ann_vaccines_pca %>%
merge(matrix_data_pca,
by.x = "Condition",
all.x = F,
all.y = F) %>%
select(Condition:Efficacy)
# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)
# Crie o gráfico de PCA
pca_plot_ann = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'Vaccine',
label = 1) + #1: display, 0: hide
labs(title=filename) +
theme_classic()
# scale_color_manual(values = c(BBIBP = "#black",
# ZF2001 = "black",
# BNT = "#56cfe1",
# ChAd = "#80ffdb" ,
# "ChAd-BNT" = "#72efdd"))
pca_plot_not_ann = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'Vaccine',
label = 0) + #1: display, 0: hide
labs(title=filename) +
theme_classic() +
stat_ellipse(type = "t")
#Salvar
ggsave(pca_plot_not_ann, filename = paste0(filename, "_PCA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_ann, filename = paste0(filename, "_PCA_ann.png"), width = 10, height = 8)
# Exiba o gráfico
print(pca_plot_ann)
print(pca_plot_not_ann)
############### KNN classification
#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
ggp1 <- fviz_nbclust(pca_scores,
FUNcluster = kmeans,
method = "wss")
ggp1
set.seed(123) # Set seed for randomization
kmeans_clust <- kmeans(pca_scores, # Perform k-means clustering
centers = 4) # Definir numero de clusters
#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
labelsize = 2) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "KNN_Clustered.png"))
print(ggp2)
ggsave(ggp2, file = paste0(filename, "KNN_Clustered.png"), width = 5, height = 4)
#Colorido
fviz_pca_var(data.pca, col.var = "cos2",
gradient.cols = c("black", "orange", "green"),
repel = TRUE)
#Colorido
fviz_pca_var(data.pca, col.var = "cos2",
gradient.cols = c("black", "orange", "green"),
repel = TRUE)
#Print
print(fviz_pca_var_genes)
Preparar dados
# Inputs
data = ssgsea_results_unified %>%
filter(!is.na(vaccine),
process != "IMMUNOGLOBULIN MEDIATED IMMUNE RESPONSE")
data_annotation = ann_vaccines_samples_4_12_23 %>%
mutate(day = as.factor(day),
dose = as.factor(dose))
filename = "ssgsea_results_unified"
#Converter de long para wide
#Matriz com anotações
ann_vaccines_pca_matrix = data %>%
mutate(qvalue = as.numeric(qvalue)) %>%
filter(qvalue < 0.10) %>%
select(sample, process, nes) %>%
dcast(., `sample` ~ `process`,
value.var = "nes",
fun.aggregate = mean) %>%
replace(., . == "NaN", 0) %>%
as.data.frame() %>%
merge(data_annotation, by.x = "sample", by.y = "sample", all.x = T, all.y = F)
#Matriz para PCA
ann_vaccines_pca_matrix_ready = ann_vaccines_pca_matrix %>%
select(!condition:previous_vaccination) %>%
column_to_rownames("sample")
# Verifique colunas com variancia baixa
nearZeroVarCols <- nearZeroVar(ann_vaccines_pca_matrix_ready, saveMetrics = TRUE)
ann_vaccines_pca_matrix_ready <- ann_vaccines_pca_matrix_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(ann_vaccines_pca_matrix_ready, scale. = TRUE)
# Crie o gráfico de PCA
pca_plot = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'vaccine',
label = 0) +
theme_minimal() +
labs(title=filename) +
theme(panel.grid = element_blank()) +
scale_fill_continuous(type = "viridis")
ggplotly(pca_plot)
# Exiba o gráfico
print(pca_plot)
ggsave(pca_plot, filename = paste0(filename, "_", "PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
# Crie o gráfico de PCA
###### Condition
pca_plot_condition = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'condition',
label = 0) +
theme(panel.grid = element_blank()) +
labs(title=paste0(filename, "_bycondition")) + #scale fill manual
scale_fill_continuous(type = "viridis") +
theme_minimal() +
theme(panel.grid = element_blank())
ggplotly(pca_plot_condition)
###### Vaccine
pca_plot_vac = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'vaccine',
label = 0) +
labs(title=paste0(filename, "_byvaccine")) + #scale fill manual
scale_color_manual(values = c("BBIBP" = "#dee2e6",
"BNT" = "#56cfe1",
"ChAd" = "#5e60ce",
"ZF2001" = "#b5179e",
"MO" = "#D7B0EE",
"I" = "#ff4d6d",
"H" = "grey95")) +
theme_minimal() +
theme(panel.grid = element_blank())
###### Day
pca_plot_day = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'day',
label = 0) +
labs(title=paste0(filename, "_day")) + #scale fill manual
scale_color_manual(values = c(
"0" = "white",
"1" = "#caf0f8",
"2" = "#ade8f4",
"3" = "#90e0ef",
"4" = "#6CD5EA",
"5" = "#48cae4",
"6" = "#00b4d8",
"7" = "#0096c7",
"10" = "#0087BF",
"14" = "#0077b6",
"26" = "#015BA0",
"28" = "#023e8a",
"51" = "#03045e")) +
theme_minimal() +
theme(panel.grid = element_blank())
###### Dose
pca_plot_dose = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'dose',
label = 0) +
labs(title=paste0(filename, "_dose")) +
scale_color_manual(
values = c("0" = "#caf0f8",
"1" = "#56cfe1",
"2" = "#5978d4",
"3" = "#b5179e")) +
theme_minimal() +
theme(panel.grid = element_blank())
# Exiba o gráfico
print(pca_plot_condition)
print(pca_plot_day)
print(pca_plot_dose)
print(pca_plot_vac)
ggsave(pca_plot_condition, filename = paste0(filename, "CONDITION_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_vac, filename = paste0(filename, "_VACCINE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_day, filename = paste0(filename, "_DAY_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_dose, filename = paste0(filename, "_DOSE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
############### KNN classification
#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
fviz_nbclust(pca_scores,
FUNcluster = kmeans,
method = "wss")
set.seed(666) # Set seed for randomization
kmeans_clust <- kmeans(pca_scores, # Perform k-means clustering
centers = 3) # Definir numero de clusters
#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
print(ggp2)
ggsave(ggp2, file = paste0(filename, "_KNN_Clustered.png"), width = 5, height = 4)
# Clusterizar por grupo
#Vaccine
pca_group_vac <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix$vaccine,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Vac_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
#Dose
pca_group_dose <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix$dose,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Dose_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
#Day
pca_group_day <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix$day,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Day_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
#Salvar
ggsave(pca_group_day, file = paste0(filename, "_Day_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_dose, file = paste0(filename, "_Dose_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_vac, file = paste0(filename, "_Vac_Clustered.png"), width = 5, height = 4)
#Biplot
biplot = fviz_pca_biplot(pca_res, # Visualize clusters in biplot
col.var = "black",
alpha.var = 0.5,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
labelsize = 3,
label = "var",
palette = "Set1")
ggsave(biplot, file = paste0(filename, "_BIPLOT_KNN_Clustered.png"), width = 10, height = 8)
########Correlation plot
corr_matrix = cor(ann_vaccines_pca_matrix_ready)
#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) +
theme(axis.text.x = element_text(angle = 90, size = 5),
axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"),
width = 4, #Grande 20, pequeno 10
height= 4) #Grande 20, pequeno 10
print(corrplot)
data.pca <- princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
scree_plot = fviz_eig(data.pca,
addlabels = TRUE,
ylim = c(0, 70)) +
geom_col(color = "#00AFBB", fill = "#00AFBB") +
theme_classic()
#Salvar
ggsave(scree_plot, file = paste0(filename, "_GSEA_screeplot.png"), width = 10, height = 3)
print(scree_plot)
#Scree plot
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))
#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) +
ggtitle(paste0("Comp1-Comp2 Genes", filename)) +
ylab("Comp1") +
xlab("Gene sets") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
print(loadings_1_plot)
loadings_2_plot = ggplot(loadings_2, aes(x = reorder(Genes, -Comp.2), y=Comp.2, fill = Comp.2)) +
ggtitle(paste0("Comp2-Comp3 Genes_", filename)) +
ylab("Comp2") +
xlab("Gene sets") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
print(loadings_2_plot)
#Salvar
ggsave(loadings_1_plot,
file = paste0(filename, "_GSEA_genescomp1-2.png"),
width = 10, height = 5)
#Salvar
ggsave(loadings_2_plot,
file = paste0(filename, "_GSEA_genescomp2-3.png"),
width = 10, height = 5)
# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
fviz_pca_var_genes
ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_GSEA.png"), width = 10)
cos2.1 = fviz_cos2(data.pca, choice = "var", axes = 1:2)
cos2.2 = fviz_cos2(data.pca, choice = "var", axes = 2:3)
ggsave(cos2.1, file = paste0(filename, "_cos2_GSEA_1.png"), width = 10)
ggsave(cos2.2, file = paste0(filename, "_cos2_GSEA_2.png"), width = 10)
Preparar dados
############### KNN classification
#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
fviz_nbclust(pca_scores,
FUNcluster = kmeans,
method = "wss")
set.seed(666) # Set seed for randomization
kmeans_clust <- kmeans(pca_scores, # Perform k-means clustering
centers = 2) # Definir numero de clusters
#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
print(ggp2)
ggsave(ggp2, file = paste0(filename, "_KNN_Clustered.png"), width = 5, height = 4)
# Clusterizar por grupo
#Vaccine
pca_group_vac <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix$vaccine,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Vac_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
#Dose
pca_group_dose <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix$dose,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Dose_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
#Day
pca_group_day <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix$day,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Day_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1") +
theme(panel.grid = element_blank())
print(pca_group_day)
print(pca_group_dose)
print(pca_group_vac)
#Salvar
ggsave(pca_group_day, file = paste0(filename, "_Day_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_dose, file = paste0(filename, "_Dose_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_vac, file = paste0(filename, "_Vac_Clustered.png"), width = 5, height = 4)
#Biplot
biplot = fviz_pca_biplot(pca_res, # Visualize clusters in biplot
col.var = "black",
alpha.var = 0.5,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
labelsize = 3,
label = "var",
palette = "Set1")
ggsave(biplot, file = paste0(filename, "_BIPLOT_KNN_Clustered.png"), width = 10, height = 8)
########Correlation plot
corr_matrix = cor(ann_vaccines_pca_matrix_ready)
#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) +
theme(axis.text.x = element_text(angle = 90, size = 5),
axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"),
width = 4, #Grande 20, pequeno 10
height= 4) #Grande 20, pequeno 10
print(corrplot)
data.pca <- princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
scree_plot = fviz_eig(data.pca,
addlabels = TRUE,
ylim = c(0, 70)) +
geom_col(color = "#00AFBB", fill = "#00AFBB") +
theme_classic()
#Salvar
ggsave(scree_plot, file = paste0(filename, "_GSEA_screeplot.png"), width = 10, height = 3)
print(scree_plot)
#Scree plot
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))
#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) +
ggtitle(paste0("Comp1-Comp2 Genes", filename)) +
ylab("Comp1") +
xlab("Gene sets") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
print(loadings_1_plot)
loadings_2_plot = ggplot(loadings_2, aes(x = reorder(Genes, -Comp.2), y=Comp.2, fill = Comp.2)) +
ggtitle(paste0("Comp2-Comp3 Genes_", filename)) +
ylab("Comp2") +
xlab("Gene sets") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
print(loadings_2_plot)
#Salvar
ggsave(loadings_1_plot,
file = paste0(filename, "_GSEA_genescomp1-2.png"),
width = 10, height = 5)
#Salvar
ggsave(loadings_2_plot,
file = paste0(filename, "_GSEA_genescomp2-3.png"),
width = 10, height = 5)
# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
fviz_pca_var_genes
ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_GSEA.png"), width = 10)
cos2.1 = fviz_cos2(data.pca, choice = "var", axes = 1:2)
cos2.2 = fviz_cos2(data.pca, choice = "var", axes = 2:3)
ggsave(cos2.1, file = paste0(filename, "_cos2_GSEA_1.png"), width = 10)
ggsave(cos2.2, file = paste0(filename, "_cos2_GSEA_2.png"), width = 10)
Processar dados
ImmuneGO_Genes = all_degs_p_05_vac_infected_19_12_23 %>%
select(-"...1") %>%
inner_join(ImmuneGO_Annotated_Genes_8_1_24 %>%
select(genes) %>%
distinct(),
by = "genes") %>%
distinct()
nonImmuneGO_Genes = all_degs_p_05_vac_infected_19_12_23 %>%
select(-"...1") %>%
anti_join(ImmuneGO_Annotated_Genes_8_1_24 %>%
select(genes) %>%
distinct(),
by = "genes") %>%
distinct()
all_non_ImmuneGO_Genes = all_degs_p_05_vac_infected_19_12_23 %>%
select(-"...1") %>%
distinct()
#INPUT
data_genes = ImmuneGO_Genes
filename = "ImmuneGO_Genes"
data_annotation = ann_vaccines %>%
mutate(day = as.factor(day),
dose = as.factor(dose))
#Converter de long para wide
#Matriz com anotações
ann_vaccines_pca_matrix = data_genes %>%
select(condition, genes, log2fold_change) %>%
dcast(., `condition` ~ `genes`,
value.var = "log2fold_change",
fun.aggregate = mean) %>%
replace(., . == "NaN", 0) %>%
as.data.frame() %>%
merge(data_annotation %>% mutate(week = as.factor(week)), by = "condition", all.x = T, all.y = F)
ggplotly(pca_plot_condition_lab)
Warning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issuesWarning: geom_GeomTextRepel() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issues